import numpy as np
import pandas as pd
from copy import deepcopy
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.stats import norm
from itertools import repeat
from numpy.random import normal
from scipy.stats import ttest_1samp
import warnings
warnings.filterwarnings('ignore')
states = pd.read_excel("./messy_data/states.xlsx")
states.drop(states.index[states.Abbreviation == "DC"], inplace=True)
states.columns = ["Jurisdiction", "Jurisdiction Abbreviation"]
states = states.append({"Jurisdiction" : "Federal", "Jurisdiction Abbreviation" : "FED"}, ignore_index=True)
states.head()
malePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Male", header=9, nrows=54).dropna(1, "all").dropna(0)
femalePrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Female", header=9, nrows=54).dropna(1, "all").dropna(0)
totalPrisonerPopulation = pd.read_excel("./messy_data/Prisoners under the jurisdiction of state or federal correctional authorities 1978-2016.xlsx", sheet_name="Total", header=9, nrows=54).dropna(1, "all").dropna(0)
def cleanPopulationTable(table, states):
# add DC data to Federal, and delete it
table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)
meltedTable = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = "Population")
table.set_index("Jurisdiction", inplace=True)
table.columns = table.columns.astype(np.int)
table.columns.name = "Year"
table = table.astype(np.float)
return table, meltedTable
malePrisonerPopulation, malePrisonerPopulation_melted = cleanPopulationTable(malePrisonerPopulation, states)
femalePrisonerPopulation, femalePrisonerPopulation_melted = cleanPopulationTable(femalePrisonerPopulation, states)
totalPrisonerPopulation, totalPrisonerPopulation_melted = cleanPopulationTable(totalPrisonerPopulation, states)
totalPrisonerPopulation.head()
totalPrisonerPopulation_melted.head()
maleAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Male").dropna().replace("/", np.nan)
maleRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Male").dropna().replace("/", np.nan)
femaleAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Female").dropna().replace("/", np.nan)
femaleRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Female").dropna().replace("/", np.nan)
totalAdmission = pd.read_excel("./messy_data/admissions.xlsx", sheet_name="Total").dropna().replace("/", np.nan)
totalRelease = pd.read_excel("./messy_data/releases.xlsx", sheet_name="Total").dropna().replace("/", np.nan)
def cleanCountTable(table, states, tableType):
table.replace([".", "/"], [np.nan, np.nan], inplace=True)
for i, row in table.iterrows():
if row.isnull().any():
table.loc[i, 1 :] = pd.to_numeric(row.iloc[1 :]).interpolate()
# add DC data to Federal, and delete it
table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] = table.loc[table.Jurisdiction == "Federal"].iloc[:, 1:] + table.loc[table.Jurisdiction == "District of Columbia"].iloc[:, 1:].replace("--", 0)
table.drop(table.index[table.Jurisdiction == "District of Columbia"], inplace=True)
meltedTable = table.melt(id_vars = "Jurisdiction", var_name = "Year", value_name = tableType)
table.set_index("Jurisdiction", inplace=True)
table.columns = table.columns.astype(np.int)
table.columns.name = "Year"
table = table.astype(np.float)
return table, meltedTable
maleAdmission, maleAdmission_melted = cleanCountTable(maleAdmission, states, "Admission")
maleRelease, maleRelease_melted = cleanCountTable(maleRelease, states, "Release")
femaleAdmission, femaleAdmission_melted = cleanCountTable(femaleAdmission, states, "Admission")
femaleRelease, femaleRelease_melted = cleanCountTable(femaleRelease, states, "Release")
totalAdmission, totalAdmission_melted = cleanCountTable(totalAdmission, states, "Admission")
totalRelease, totalRelease_melted = cleanCountTable(totalRelease, states, "Release")
totalAdmission.head()
totalAdmission_melted.head()
regions = pd.read_excel("./messy_data/state_region.xlsx")
regions.drop(regions.index[regions.State == "District of Columbia"], inplace=True)
regions.head()
def aggregateRegionalPopulationSum(meltedTable, regions):
meltedTable = regions.merge(meltedTable, left_on="State", right_on="Jurisdiction", how="right")
temp_index = meltedTable["Jurisdiction"] == "Federal"
meltedTable.loc[temp_index, "Region"] = "Federal"
meltedTable.loc[temp_index, "Division"] = "Federal"
regionSum = meltedTable.groupby(["Year", "Region"]).Population.sum().unstack(0)
divisionSum = meltedTable.groupby(["Year", "Region", "Division"]).Population.sum().unstack(0)
return regionSum, divisionSum
malePrisonerPopulationRegionSum, malePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(malePrisonerPopulation_melted, regions)
femalePrisonerPopulationRegionSum, femalePrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(femalePrisonerPopulation_melted, regions)
totalPrisonerPopulationRegionSum, totalPrisonerPopulationDivisionSum = aggregateRegionalPopulationSum(totalPrisonerPopulation_melted, regions)
totalPrisonerPopulationRegionSum
totalPrisonerPopulationDivisionSum
def aggregateRegionalCountSum(meltedTable, regions, tableType):
meltedTable = regions.merge(meltedTable, left_on="State", right_on="Jurisdiction", how="right")
temp_index = meltedTable["Jurisdiction"] == "Federal"
meltedTable.loc[temp_index, "Region"] = "Federal"
meltedTable.loc[temp_index, "Division"] = "Federal"
regionSum = meltedTable.groupby(["Year", "Region"])[tableType].sum().unstack(0)
divisionSum = meltedTable.groupby(["Year", "Region", "Division"])[tableType].sum().unstack(0)
return regionSum, divisionSum
maleAdmissionRegionSum, maleAdmissionDivisionSum = aggregateRegionalCountSum(maleAdmission_melted, regions, "Admission")
maleReleaseRegionSum, maleReleaseDivisionSum = aggregateRegionalCountSum(maleRelease_melted, regions, "Release")
femaleAdmissionRegionSum, femaleAdmissionDivisionSum = aggregateRegionalCountSum(femaleAdmission_melted, regions, "Admission")
femaleReleaseRegionSum, femaleReleaseDivisionSum = aggregateRegionalCountSum(femaleRelease_melted, regions, "Release")
totalAdmissionRegionSum, totalAdmissionDivisionSum = aggregateRegionalCountSum(totalAdmission_melted, regions, "Admission")
totalReleaseRegionSum, totalReleaseDivisionSum = aggregateRegionalCountSum(femaleRelease_melted, regions, "Release")
totalAdmissionRegionSum
totalAdmissionDivisionSum
capacity = pd.read_excel("./messy_data/facility capacity 2016.xlsx").replace(["…", "/"], np.nan)
capacity = capacity.set_index("Jurisdiction").max(axis=1)
capacity.dropna(inplace=True)
capacity = capacity.astype(np.float)
capacity.name = "Capacity"
capacity.head()
capacitySum = regions.merge(capacity, left_on="State",right_on="Jurisdiction")
capacitySum.columns = ["Jurisdiction", "Division", "Region", "Capacity"]
capacitySum.iloc[-1] = ["Federal", "Federal", "Federal", capacity["Federal"]]
capacitySum = capacitySum[["Region", "Division", "Jurisdiction", "Capacity"]]
capacityRegionSum = capacitySum.groupby(["Region"]).Capacity.sum()
capacityDivisionSum = capacitySum.groupby(["Region", "Division"]).Capacity.sum()
capacityRegionSum
capacityDivisionSum
annualSentence = pd.read_excel("./messy_data/national sentencing.xlsx").iloc[:, [0,1]]
annualSentence.columns = ["Year", "Sentence"]
annualSentence.set_index("Year", inplace=True)
annualSentence = annualSentence.iloc[:-1,0]
annualSentence.plot()
plt.xticks(np.array(range(1995, 2017, 3)))
plt.ylabel("Sentence [Year]")
plt.title("Average National Sentencing")
def extractAreaPopulationCount(table, area, tableType="Population", year_begin=1978,year_end=2016):
# Extract a series of population, admission or release data for an area (state, region or division)
# area is a pandas index
data = table.loc[area]
data = data[np.logical_and(data.index <= year_end, data.index >= year_begin)]
data.name = tableType
data.index.name="Year"
return data
def propogateMu(past_mu,past_p, current_p, current_a,current_r, current_y):
# State propogation function derived from the equilibrium of population * mu
return (past_p * (past_mu - 1) + 0.5 * current_r + current_a * (current_y - 0.5)) / current_p
def solveMu(initialMu,initialPopulation, population, admission,release, anualSentence):
# Given the initial state of mu and populatin,
# this function can calculate the evolution of mu
# according to the revolution of population, admission, release, and average sentence
mu = population * 0
mu.name = "Mu"
for i, p in enumerate(population):
if i == 0:
past_p = initialPopulation
past_mu = initialMu
mu.iloc[i] = propogateMu(past_mu, past_p, population.iloc[i], admission.iloc[i],release.iloc[i], anualSentence.iloc[i])
past_mu = mu.iloc[i]
past_p = population.iloc[i]
mu[1994] = initialMu
mu = mu.sort_index()
return mu
def estimateSigmaOfNormalCDF(x,F, mu):
# Given x and F -- CDF at x of a normal distribution with center of mu,
# this function estimate the square root of variance
# This function can take vetor input
# we have function F(x) as the CDF of N(mu, sigma^2)
# given function Phi(x) as the CDF of N(0, 1)
# F(x) = Phi((x - mu) / sigma)
# the reverse function of Phi is norm.ppf()
return (x - mu) / norm.ppf(F)
def estimateSigma(population, release, mu):
mu = mu.values[: -1]
p = population.values[: -1]
r = release.values
F = r / p
sigma = estimateSigmaOfNormalCDF(1,F, mu)
sigma = sigma[sigma >= 0]
return sigma.mean()
def sampleFutureAdmissionSentence(sampleMean,sampleSD, year_end, dataType):
years = range(2017, year_end + 1)
simulated = normal(sampleMean,sampleSD, len(years))
simulated[simulated <= 0.] = sampleMean
if dataType == "Admission":
simulated = np.round(simulated)
simulated = pd.Series(simulated, index=years, name=dataType)
simulated.index.name = "Year"
return simulated
def propogateState(past_mu,past_p, current_a,current_y, epsilon_sd, sigma):
# State propogation function
# This function estimates release as PDF of a normal distribution with mu as the center.
# The population was propogated as increment of difference between admission and release, plus proportional error
if np.isnan(sigma):
current_r = (- past_p + current_a * (current_y - 0.5 + past_mu)) / (past_mu - 0.5)
# This dynamics tries to stablize mu
else:
current_r = np.round(norm(past_mu, sigma).cdf(1) * past_p)
current_p = np.round(past_p + current_a - current_r + normal(scale=epsilon_sd) * past_p)
current_mu = propogateMu(past_mu,past_p, current_p, current_a,current_r, current_y)
return current_mu, current_p, current_r
def solvePopulation(initialMu,initialPopulation, futureAdmission,futureSentence, epsilon_sd, sigma):
futurePopulation = futureAdmission * 0
futurePopulation.name = "Population"
futureRelease = futureAdmission * 0
futureRelease.name = "Release"
futureMu = futureSentence * 0
futureMu.name = "Mu"
for i, p in enumerate(futurePopulation):
if i == 0:
past_mu = initialMu
past_p = initialPopulation
futureMu.iloc[i], futurePopulation.iloc[i], futureRelease.iloc[i] = propogateState(past_mu,past_p, futureAdmission.iloc[i],futureSentence.iloc[i], epsilon_sd, sigma)
past_mu = futureMu.iloc[i]
past_p = futurePopulation.iloc[i]
return futureMu, futurePopulation, futureRelease
def modelAndProject_NSamples_OneArea(populationTable, admissionTable,releaseTable, area, annualSentence, initialMu, projectionYear, N):
# Note: the population, admission and release data range -- 1978-2016
# the anual sentencing data range -- 1995-2016
# N is the number of samples to draw for prediction
# initialMu is the mu for 1994
# projectionYear is the end of projection
# Estimate error term of population increment
population = extractAreaPopulationCount(populationTable, area, "Population")
admission = extractAreaPopulationCount(admissionTable, area, "Admission")
release = extractAreaPopulationCount(releaseTable, area, "Release")
# data range 1978-2017
epsilon = (population.diff() - admission + release).values[1:] / population.values[:-1]
epsilon_sd = epsilon.std()
# Model state propogation
modernPopulation = population.loc[1995 :]
modernAdmission = admission.loc[1995 :]
modernRelease = release.loc[1995 :]
# data range 1995-2016
# estimate mu[t]
initialPopulation = population[1994] # use 1994 for initial population
mu = solveMu(initialMu,initialPopulation, modernPopulation, modernAdmission,modernRelease, annualSentence)
# solveMu already concates initial state of mu to the output
# estimate sigma
modernPopulation = population.loc[1994 :]
# include the initial state of population
sigma = estimateSigma(modernPopulation, modernRelease, mu)
# Project to from 2017 to projectionYear
# Model and sample N sets of future admission and sentence
admissionSD = modernAdmission.std()
futureAdmissions = [sampleFutureAdmissionSentence(modernAdmission.iloc[-1],admissionSD, 2050, "Admission") for i in range(N)]
sentenceSD = annualSentence.std()
futureSentences = [sampleFutureAdmissionSentence(annualSentence.iloc[-1],sentenceSD, 2050, "Sentence") for i in range(N)]
# Project future mu, population and release
futureMus, futurePopulations, futureReleases = zip(*map(solvePopulation, repeat(mu.iloc[-1]),repeat(modernPopulation.iloc[-1]), futureAdmissions,futureSentences, repeat(epsilon_sd), repeat(sigma)))
futurePopulations = pd.concat(futurePopulations, axis=1)
futureAdmissions = pd.concat(futureAdmissions, axis=1)
futureReleases = pd.concat(futureReleases, axis=1)
return futurePopulations, futureAdmissions,futureReleases
def collapseSamples(sampleTable, area):
sampleMean = sampleTable.mean(axis=1)
sampleMean.name = area
sampleSD = sampleTable.std(axis=1)
sampleSD.name = area
return sampleMean, sampleSD
def modelAndProjectPopulation_NSamples(populationTable, admissionTable,releaseTable, annualSentence, initialMu, projectionYear, N):
areas = populationTable.index
futurePopulationMeans = []
futurePopulationSDs = []
futureAdmissionMeans = []
futureAdmissionSDs = []
futureReleaseMeans = []
futureReleaseSDs = []
for area in areas:
futurePopulations, futureAdmissions,futureReleases = modelAndProject_NSamples_OneArea(populationTable, admissionTable,releaseTable, area, annualSentence, initialMu, projectionYear, N)
futurePopulationMean, futurePopulationSD = collapseSamples(futurePopulations, area)
futurePopulationMeans.append(futurePopulationMean)
futurePopulationSDs.append(futurePopulationSD)
futureAdmissionMean, futureAdmissionSD = collapseSamples(futureAdmissions, area)
futureAdmissionMeans.append(futureAdmissionMean)
futureAdmissionSDs.append(futureAdmissionSD)
futureReleaseMean, futureReleaseSD = collapseSamples(futureReleases, area)
futureReleaseMeans.append(futureReleaseMean)
futureReleaseSDs.append(futureReleaseSD)
futurePopulationMeans = pd.concat(futurePopulationMeans, axis=1).transpose()
futureAdmissionMeans = pd.concat(futureAdmissionMeans, axis=1).transpose()
futureReleaseMeans = pd.concat(futureReleaseMeans, axis=1).transpose()
futurePopulationSDs = pd.concat(futurePopulationSDs, axis=1).transpose()
futureAdmissionSDs = pd.concat(futureAdmissionSDs, axis=1).transpose()
futureReleaseSDs = pd.concat(futureReleaseSDs, axis=1).transpose()
return futurePopulationMeans,futurePopulationSDs, futureAdmissionMeans,futureAdmissionSDs, futureReleaseMeans,futureReleaseSDs
plt.rcParams["figure.figsize"] = (10,5)
maleFuturePopulationMean_5,maleFuturePopulationSD_5, maleFutureAdmissionMean_5,maleFutureAdmissionSD_5, maleFutureReleaseMean_5,maleFutureReleaseSD_5 = modelAndProjectPopulation_NSamples(malePrisonerPopulation, maleAdmission,maleRelease, annualSentence, 5, 2050, 250)
maleFuturePopulationMean_10,maleFuturePopulationSD_10, maleFutureAdmissionMean_10,maleFutureAdmissionSD_10, maleFutureReleaseMean_10,maleFutureReleaseSD_10 = modelAndProjectPopulation_NSamples(malePrisonerPopulation, maleAdmission,maleRelease, annualSentence, 10, 2050, 250)
maleFuturePopulationMean_20,maleFuturePopulationSD_20, maleFutureAdmissionMean_20,maleFutureAdmissionSD_20, maleFutureReleaseMean_20,maleFutureReleaseSD_20 = modelAndProjectPopulation_NSamples(malePrisonerPopulation, maleAdmission,maleRelease, annualSentence, 20, 2050, 250)
maleFuturePopulationMean_40,maleFuturePopulationSD_40, maleFutureAdmissionMean_40,maleFutureAdmissionSD_40, maleFutureReleaseMean_40,maleFutureReleaseSD_40 = modelAndProjectPopulation_NSamples(malePrisonerPopulation, maleAdmission,maleRelease, annualSentence, 40, 2050, 250)
femaleFuturePopulationMean_5,femaleFuturePopulationSD_5, femaleFutureAdmissionMean_5,femaleFutureAdmissionSD_5, femaleFutureReleaseMean_5,femaleFutureReleaseSD_5 = modelAndProjectPopulation_NSamples(femalePrisonerPopulation, femaleAdmission,femaleRelease, annualSentence, 5, 2050, 250)
femaleFuturePopulationMean_10,femaleFuturePopulationSD_10, femaleFutureAdmissionMean_10,femaleFutureAdmissionSD_10, femaleFutureReleaseMean_10,femaleFutureReleaseSD_10 = modelAndProjectPopulation_NSamples(femalePrisonerPopulation, femaleAdmission,femaleRelease, annualSentence, 10, 2050, 250)
femaleFuturePopulationMean_20,femaleFuturePopulationSD_20, femaleFutureAdmissionMean_20,femaleFutureAdmissionSD_20, femaleFutureReleaseMean_20,femaleFutureReleaseSD_20 = modelAndProjectPopulation_NSamples(femalePrisonerPopulation, femaleAdmission,femaleRelease, annualSentence, 20, 2050, 250)
femaleFuturePopulationMean_40,femaleFuturePopulationSD_40, femaleFutureAdmissionMean_40,femaleFutureAdmissionSD_40, femaleFutureReleaseMean_40,femaleFutureReleaseSD_40 = modelAndProjectPopulation_NSamples(femalePrisonerPopulation, femaleAdmission,femaleRelease, annualSentence, 40, 2050, 250)
maleFuturePopulationRegionSumMean_5,maleFuturePopulationRegionSumSD_5, maleFutureAdmissionRegionSumMean_5,maleFutureAdmissionRegionSumSD_5, maleFutureReleaseRegionSumMean_5,maleFutureReleaseRegionSumSD_5 = modelAndProjectPopulation_NSamples(malePrisonerPopulationRegionSum, maleAdmissionRegionSum,maleReleaseRegionSum, annualSentence, 5, 2050, 250)
maleFuturePopulationRegionSumMean_10,maleFuturePopulationRegionSumSD_10, maleFutureAdmissionRegionSumMean_10,maleFutureAdmissionRegionSumSD_10, maleFutureReleaseRegionSumMean_10,maleFutureReleaseRegionSumSD_10 = modelAndProjectPopulation_NSamples(malePrisonerPopulationRegionSum, maleAdmissionRegionSum,maleReleaseRegionSum, annualSentence, 10, 2050, 250)
maleFuturePopulationRegionSumMean_20,maleFuturePopulationRegionSumSD_20, maleFutureAdmissionRegionSumMean_20,maleFutureAdmissionRegionSumSD_20, maleFutureReleaseRegionSumMean_20,maleFutureReleaseRegionSumSD_20 = modelAndProjectPopulation_NSamples(malePrisonerPopulationRegionSum, maleAdmissionRegionSum,maleReleaseRegionSum, annualSentence, 20, 2050, 250)
maleFuturePopulationRegionSumMean_40,maleFuturePopulationRegionSumSD_40, maleFutureAdmissionRegionSumMean_40,maleFutureAdmissionRegionSumSD_40, maleFutureReleaseRegionSumMean_40,maleFutureReleaseRegionSumSD_40 = modelAndProjectPopulation_NSamples(malePrisonerPopulationRegionSum, maleAdmissionRegionSum,maleReleaseRegionSum, annualSentence, 40, 2050, 250)
femaleFuturePopulationRegionSumMean_5,femaleFuturePopulationRegionSumSD_5, femaleFutureAdmissionRegionSumMean_5,femaleFutureAdmissionRegionSumSD_5, femaleFutureReleaseRegionSumMean_5,femaleFutureReleaseRegionSumSD_5 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationRegionSum, femaleAdmissionRegionSum,femaleReleaseRegionSum, annualSentence, 5, 2050, 250)
femaleFuturePopulationRegionSumMean_10,femaleFuturePopulationRegionSumSD_10, femaleFutureAdmissionRegionSumMean_10,femaleFutureAdmissionRegionSumSD_10, femaleFutureReleaseRegionSumMean_10,femaleFutureReleaseRegionSumSD_10 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationRegionSum, femaleAdmissionRegionSum,femaleReleaseRegionSum, annualSentence, 10, 2050, 250)
femaleFuturePopulationRegionSumMean_20,femaleFuturePopulationRegionSumSD_20, femaleFutureAdmissionRegionSumMean_20,femaleFutureAdmissionRegionSumSD_20, femaleFutureReleaseRegionSumMean_20,femaleFutureReleaseRegionSumSD_20 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationRegionSum, femaleAdmissionRegionSum,femaleReleaseRegionSum, annualSentence, 20, 2050, 250)
femaleFuturePopulationRegionSumMean_40,femaleFuturePopulationRegionSumSD_40, femaleFutureAdmissionRegionSumMean_40,femaleFutureAdmissionRegionSumSD_40, femaleFutureReleaseRegionSumMean_40,femaleFutureReleaseRegionSumSD_40 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationRegionSum, femaleAdmissionRegionSum,femaleReleaseRegionSum, annualSentence, 40, 2050, 250)
maleFuturePopulationDivisionSumMean_5,maleFuturePopulationDivisionSumSD_5, maleFutureAdmissionDivisionSumMean_5,maleFutureAdmissionDivisionSumSD_5, maleFutureReleaseDivisionSumMean_5,maleFutureReleaseDivisionSumSD_5 = modelAndProjectPopulation_NSamples(malePrisonerPopulationDivisionSum, maleAdmissionDivisionSum,maleReleaseDivisionSum, annualSentence, 5, 2050, 250)
maleFuturePopulationDivisionSumMean_10,maleFuturePopulationDivisionSumSD_10, maleFutureAdmissionDivisionSumMean_10,maleFutureAdmissionDivisionSumSD_10, maleFutureReleaseDivisionSumMean_10,maleFutureReleaseDivisionSumSD_10 = modelAndProjectPopulation_NSamples(malePrisonerPopulationDivisionSum, maleAdmissionDivisionSum,maleReleaseDivisionSum, annualSentence, 10, 2050, 250)
maleFuturePopulationDivisionSumMean_20,maleFuturePopulationDivisionSumSD_20, maleFutureAdmissionDivisionSumMean_20,maleFutureAdmissionDivisionSumSD_20, maleFutureReleaseDivisionSumMean_20,maleFutureReleaseDivisionSumSD_20 = modelAndProjectPopulation_NSamples(malePrisonerPopulationDivisionSum, maleAdmissionDivisionSum,maleReleaseDivisionSum, annualSentence, 20, 2050, 250)
maleFuturePopulationDivisionSumMean_40,maleFuturePopulationDivisionSumSD_40, maleFutureAdmissionDivisionSumMean_40,maleFutureAdmissionDivisionSumSD_40, maleFutureReleaseDivisionSumMean_40,maleFutureReleaseDivisionSumSD_40 = modelAndProjectPopulation_NSamples(malePrisonerPopulationDivisionSum, maleAdmissionDivisionSum,maleReleaseDivisionSum, annualSentence, 40, 2050, 250)
femaleFuturePopulationDivisionSumMean_5,femaleFuturePopulationDivisionSumSD_5, femaleFutureAdmissionDivisionSumMean_5,femaleFutureAdmissionDivisionSumSD_5, femaleFutureReleaseDivisionSumMean_5,femaleFutureReleaseDivisionSumSD_5 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationDivisionSum, femaleAdmissionDivisionSum,femaleReleaseDivisionSum, annualSentence, 5, 2050, 250)
femaleFuturePopulationDivisionSumMean_10,femaleFuturePopulationDivisionSumSD_10, femaleFutureAdmissionDivisionSumMean_10,femaleFutureAdmissionDivisionSumSD_10, femaleFutureReleaseDivisionSumMean_10,femaleFutureReleaseDivisionSumSD_10 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationDivisionSum, femaleAdmissionDivisionSum,femaleReleaseDivisionSum, annualSentence, 10, 2050, 250)
femaleFuturePopulationDivisionSumMean_20,femaleFuturePopulationDivisionSumSD_20, femaleFutureAdmissionDivisionSumMean_20,femaleFutureAdmissionDivisionSumSD_20, femaleFutureReleaseDivisionSumMean_20,femaleFutureReleaseDivisionSumSD_20 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationDivisionSum, femaleAdmissionDivisionSum,femaleReleaseDivisionSum, annualSentence, 20, 2050, 250)
femaleFuturePopulationDivisionSumMean_40,femaleFuturePopulationDivisionSumSD_40, femaleFutureAdmissionDivisionSumMean_40,femaleFutureAdmissionDivisionSumSD_40, femaleFutureReleaseDivisionSumMean_40,femaleFutureReleaseDivisionSumSD_40 = modelAndProjectPopulation_NSamples(femalePrisonerPopulationDivisionSum, femaleAdmissionDivisionSum,femaleReleaseDivisionSum, annualSentence, 40, 2050, 250)
def plotFederalProjection_GenderPopulation(population,futurePopulation,futurePopulation_error, admission,futureAdmission,futureAdmission_error, release,futureRelease,futureRelease_error, sex,initialMu,maxPopulation):
pd.concat([population, futurePopulation]).plot(color="darkorange", label="population")
plt.fill_between(futurePopulation.index, futurePopulation - futurePopulation_error, futurePopulation + futurePopulation_error, color="darkorange", alpha=0.25)
pd.concat([admission, futureAdmission]).plot(color="crimson", label="admission")
plt.fill_between(futureAdmission.index, futureAdmission - futurePopulation_error, futureAdmission + futureAdmission_error, color="crimson", alpha=0.25)
pd.concat([release, futureRelease]).plot(color="steelblue", label="release")
plt.fill_between(futureRelease.index, futureRelease - futureRelease_error, futureRelease + futureRelease_error, color="steelblue", alpha=0.25)
plt.axvline(1995, color="grey")
plt.legend(loc="upper left")
plt.ylim([0, maxPopulation])
plt.ylabel("Head Count")
plt.title("Projection of Federal " + sex + " Prisoner Population, mu[1994]=" + str(initialMu))
plt.show()
def plotRegionalProjection_GenderPopulation(population,futurePopulation,futurePopulation_error, sex,initialMu,maxPopulation):
population = population.sort_index().drop("Federal").transpose()
futurePopulation = futurePopulation.sort_index().drop("Federal").transpose()
futurePopulation_error = futurePopulation_error.drop("Federal").sort_index().transpose()
pd.concat([population, futurePopulation]).plot()
plt.gca().set_prop_cycle(None)
for i in range(population.shape[1]):
plt.fill_between(futurePopulation.index, futurePopulation.iloc[:, i] - futurePopulation_error.iloc[:, i], futurePopulation.iloc[:, i] + futurePopulation_error.iloc[:, i], alpha=0.25)
plt.axvline(1995, color="grey")
plt.legend(loc="upper left")
plt.ylim([0, maxPopulation])
plt.ylabel("Head Count")
plt.title("Projection of Regional " + sex + " Prisoner Population, mu[1994]=" + str(initialMu))
plt.show()
def alignDivisionTable(table, divisionTable, regions):
regions = regions.set_index("State")
divisionTable = divisionTable.drop("Federal")
divisionTable = divisionTable.sort_index()
table = regions.join(table).reset_index().set_index(["Region","Division", "State"]).sort_index()
return table, divisionTable
def plotDivisionalPopulationProjection_GenderPopulation(population,futurePopulation,futurePopulation_error, divisionalPopulation,futureDivisionalPopulation,futureDivisionalPopulation_error, sex,initialMu):
population, divisionalPopulation = alignDivisionTable(population, divisionalPopulation, regions)
futurePopulation, futureDivisionalPopulation = alignDivisionTable(futurePopulation, futureDivisionalPopulation, regions)
futurePopulation_error, futureDivisionalPopulation_error = alignDivisionTable(futurePopulation_error, futureDivisionalPopulation_error, regions)
regionList = population.index.get_level_values(0).unique()
for region in regionList:
divisionalPopulation_1Region = divisionalPopulation.loc[region]
futureDivisionalPopulation_1Region = futureDivisionalPopulation.loc[region]
futureDivisionalPopulation_error_1Region = futureDivisionalPopulation_error.loc[region]
plt.figure()
pd.concat([divisionalPopulation_1Region, futureDivisionalPopulation_1Region], axis=1).transpose().plot()
plt.legend(loc="upper left")
plt.gca().set_prop_cycle(None)
for index, row in futureDivisionalPopulation_1Region.iterrows():
plt.fill_between(row.index.values, row - futureDivisionalPopulation_error_1Region.loc[index], row + futureDivisionalPopulation_error_1Region.loc[index], alpha=0.25)
plt.axvline(1995, color="grey")
plt.ylabel("Population")
plt.title(region + " " + sex + " Population, mu[1994]=" + str(initialMu))
population_1Region = population.loc[region]
futurePopulation_1Region = futurePopulation.loc[region]
futurePopulation_error_1Region = futurePopulation_error.loc[region]
divisions = population_1Region.index.get_level_values(0).unique()
for division in divisions:
population_1Division = population_1Region.loc[division]
futurePopulation_1Division = futurePopulation_1Region.loc[division]
futurePopulation_error_1Division = futurePopulation_error_1Region.loc[division]
plt.figure()
pd.concat([population_1Division, futurePopulation_1Division], axis=1).transpose().plot()
plt.legend(loc="upper left")
plt.gca().set_prop_cycle(None)
for index, row in futurePopulation_1Division.iterrows():
plt.fill_between(row.index.values.astype(int), row - futurePopulation_error_1Division.loc[index], row + futurePopulation_error_1Division.loc[index], alpha=0.25)
plt.axvline(1995, color="grey")
plt.ylabel("Population")
plt.title(region + " -- " + division + " " + sex + " Population, mu[1994]=" + str(initialMu))
plotFederalProjection_GenderPopulation(malePrisonerPopulation.loc["Federal"], maleFuturePopulationMean_5.loc["Federal"],maleFuturePopulationSD_5.loc["Federal"],maleAdmission.loc["Federal"], maleFutureAdmissionMean_5.loc["Federal"],maleFutureAdmissionSD_5.loc["Federal"], maleRelease.loc["Federal"], maleFutureReleaseMean_5.loc["Federal"],maleFutureReleaseSD_5.loc["Federal"], "Male", 5, 210000)
plotRegionalProjection_GenderPopulation(malePrisonerPopulationRegionSum, maleFuturePopulationRegionSumMean_5,maleFuturePopulationRegionSumSD_5, "Male", 5, 800000)
plotDivisionalPopulationProjection_GenderPopulation(malePrisonerPopulation,maleFuturePopulationMean_5,maleFuturePopulationSD_5, malePrisonerPopulationDivisionSum,maleFuturePopulationDivisionSumMean_5,maleFuturePopulationDivisionSumSD_5, "Male",5)
plotFederalProjection_GenderPopulation(malePrisonerPopulation.loc["Federal"], maleFuturePopulationMean_10.loc["Federal"],maleFuturePopulationSD_10.loc["Federal"],maleAdmission.loc["Federal"], maleFutureAdmissionMean_10.loc["Federal"],maleFutureAdmissionSD_10.loc["Federal"], maleRelease.loc["Federal"], maleFutureReleaseMean_10.loc["Federal"],maleFutureReleaseSD_10.loc["Federal"], "Male", 10, 210000)
plotRegionalProjection_GenderPopulation(malePrisonerPopulationRegionSum, maleFuturePopulationRegionSumMean_10,maleFuturePopulationRegionSumSD_10, "Male", 10, 800000)
plotDivisionalPopulationProjection_GenderPopulation(malePrisonerPopulation,maleFuturePopulationMean_10,maleFuturePopulationSD_10, malePrisonerPopulationDivisionSum,maleFuturePopulationDivisionSumMean_10,maleFuturePopulationDivisionSumSD_10, "Male",10)
plotFederalProjection_GenderPopulation(malePrisonerPopulation.loc["Federal"], maleFuturePopulationMean_20.loc["Federal"],maleFuturePopulationSD_20.loc["Federal"],maleAdmission.loc["Federal"], maleFutureAdmissionMean_20.loc["Federal"],maleFutureAdmissionSD_20.loc["Federal"], maleRelease.loc["Federal"], maleFutureReleaseMean_20.loc["Federal"],maleFutureReleaseSD_20.loc["Federal"], "Male", 20, 210000)
plotRegionalProjection_GenderPopulation(malePrisonerPopulationRegionSum, maleFuturePopulationRegionSumMean_20,maleFuturePopulationRegionSumSD_20, "Male", 20, 800000)
plotDivisionalPopulationProjection_GenderPopulation(malePrisonerPopulation,maleFuturePopulationMean_20,maleFuturePopulationSD_20, malePrisonerPopulationDivisionSum,maleFuturePopulationDivisionSumMean_20,maleFuturePopulationDivisionSumSD_20, "Male",20)
femalePrisonerPopulationRegionSum
plotFederalProjection_GenderPopulation(malePrisonerPopulation.loc["Federal"], maleFuturePopulationMean_40.loc["Federal"],maleFuturePopulationSD_40.loc["Federal"],maleAdmission.loc["Federal"], maleFutureAdmissionMean_40.loc["Federal"],maleFutureAdmissionSD_40.loc["Federal"], maleRelease.loc["Federal"], maleFutureReleaseMean_40.loc["Federal"],maleFutureReleaseSD_40.loc["Federal"], "Male", 40, 210000)
plotRegionalProjection_GenderPopulation(malePrisonerPopulationRegionSum, maleFuturePopulationRegionSumMean_40,maleFuturePopulationRegionSumSD_40, "Male", 40, 800000)
plotDivisionalPopulationProjection_GenderPopulation(malePrisonerPopulation,maleFuturePopulationMean_40,maleFuturePopulationSD_40, malePrisonerPopulationDivisionSum,maleFuturePopulationDivisionSumMean_40,maleFuturePopulationDivisionSumSD_40, "Male",40)
plotFederalProjection_GenderPopulation(femalePrisonerPopulation.loc["Federal"], femaleFuturePopulationMean_5.loc["Federal"],femaleFuturePopulationSD_5.loc["Federal"],femaleAdmission.loc["Federal"], femaleFutureAdmissionMean_5.loc["Federal"],femaleFutureAdmissionSD_5.loc["Federal"], femaleRelease.loc["Federal"], femaleFutureReleaseMean_5.loc["Federal"],femaleFutureReleaseSD_5.loc["Federal"], "Female", 5, 18000)
plotRegionalProjection_GenderPopulation(femalePrisonerPopulationRegionSum, femaleFuturePopulationRegionSumMean_5,femaleFuturePopulationRegionSumSD_5, "Female", 1, 85000)
plotDivisionalPopulationProjection_GenderPopulation(femalePrisonerPopulation,femaleFuturePopulationMean_5,femaleFuturePopulationSD_5, femalePrisonerPopulationDivisionSum,femaleFuturePopulationDivisionSumMean_5,femaleFuturePopulationDivisionSumSD_5, "Female",1)
plotFederalProjection_GenderPopulation(femalePrisonerPopulation.loc["Federal"], femaleFuturePopulationMean_10.loc["Federal"],femaleFuturePopulationSD_10.loc["Federal"],femaleAdmission.loc["Federal"], femaleFutureAdmissionMean_10.loc["Federal"],femaleFutureAdmissionSD_10.loc["Federal"], femaleRelease.loc["Federal"], femaleFutureReleaseMean_10.loc["Federal"],femaleFutureReleaseSD_10.loc["Federal"], "Female", 10, 18000)
plotRegionalProjection_GenderPopulation(femalePrisonerPopulationRegionSum, femaleFuturePopulationRegionSumMean_10,femaleFuturePopulationRegionSumSD_10, "Female", 2, 85000)
plotDivisionalPopulationProjection_GenderPopulation(femalePrisonerPopulation,femaleFuturePopulationMean_10,femaleFuturePopulationSD_10, femalePrisonerPopulationDivisionSum,femaleFuturePopulationDivisionSumMean_10,femaleFuturePopulationDivisionSumSD_10, "Female",2)
plotFederalProjection_GenderPopulation(femalePrisonerPopulation.loc["Federal"], femaleFuturePopulationMean_20.loc["Federal"],femaleFuturePopulationSD_20.loc["Federal"],femaleAdmission.loc["Federal"], femaleFutureAdmissionMean_20.loc["Federal"],femaleFutureAdmissionSD_20.loc["Federal"], femaleRelease.loc["Federal"], femaleFutureReleaseMean_20.loc["Federal"],femaleFutureReleaseSD_20.loc["Federal"], "Female", 20, 18000)
plotRegionalProjection_GenderPopulation(femalePrisonerPopulationRegionSum, femaleFuturePopulationRegionSumMean_20,femaleFuturePopulationRegionSumSD_20, "Female", 3, 85000)
plotDivisionalPopulationProjection_GenderPopulation(femalePrisonerPopulation,femaleFuturePopulationMean_20,femaleFuturePopulationSD_20, femalePrisonerPopulationDivisionSum,femaleFuturePopulationDivisionSumMean_20,femaleFuturePopulationDivisionSumSD_20, "Female",3)
plotFederalProjection_GenderPopulation(femalePrisonerPopulation.loc["Federal"], femaleFuturePopulationMean_40.loc["Federal"],femaleFuturePopulationSD_40.loc["Federal"],femaleAdmission.loc["Federal"], femaleFutureAdmissionMean_40.loc["Federal"],femaleFutureAdmissionSD_40.loc["Federal"], femaleRelease.loc["Federal"], femaleFutureReleaseMean_40.loc["Federal"],femaleFutureReleaseSD_40.loc["Federal"], "Female", 40, 18000)
plotRegionalProjection_GenderPopulation(femalePrisonerPopulationRegionSum, femaleFuturePopulationRegionSumMean_40,femaleFuturePopulationRegionSumSD_40, "Female", 4, 85000)
plotDivisionalPopulationProjection_GenderPopulation(femalePrisonerPopulation,femaleFuturePopulationMean_40,femaleFuturePopulationSD_40, femalePrisonerPopulationDivisionSum,femaleFuturePopulationDivisionSumMean_40,femaleFuturePopulationDivisionSumSD_40, "Female",4)
def modelAndProjectCapacity_NSamples(populationTable, admissionTable,releaseTable, annualSentence, capacity, initialMu, projectionYear, N):
areas = populationTable.index
futureOccupancies = {}
futureOccupancyMean = []
futureOccupancySD = []
for area in areas:
if area in capacity.index:
futurePopulation, _,_ = modelAndProject_NSamples_OneArea(populationTable, admissionTable,releaseTable, area, annualSentence, initialMu, projectionYear, N)
# futurePopulation.sort_index(inplace=True)
futureOccupancy = futurePopulation / capacity[area]
futureOccupancies[area] = futureOccupancy
occupancyMean = futureOccupancy.mean(axis=1)
occupancyMean.index.name = "Year"
occupancyMean.name = area
occupancySD = futureOccupancy.std(axis=1)
occupancySD.index.name = "Year"
occupancySD.name = area
futureOccupancyMean.append(occupancyMean)
futureOccupancySD.append(occupancySD)
futureOccupancyMean = pd.concat(futureOccupancyMean, axis=1).transpose()
futureOccupancyMean.index.name = "Jurisdiction"
futureOccupancySD = pd.concat(futureOccupancySD, axis=1).transpose()
futureOccupancySD.index.name = "Jurisdiction"
return futureOccupancies, futureOccupancyMean,futureOccupancySD
futureOccupancies_5, futureOccupancyMean_5,futureOccupancySD_5 = modelAndProjectCapacity_NSamples(totalPrisonerPopulation, totalAdmission,totalRelease, annualSentence, capacity, 5, 2050, 250)
futureOccupancies_10, futureOccupancyMean_10,futureOccupancySD_10 = modelAndProjectCapacity_NSamples(totalPrisonerPopulation, totalAdmission,totalRelease, annualSentence, capacity, 10, 2050, 250)
futureOccupancies_20, futureOccupancyMean_20,futureOccupancySD_20 = modelAndProjectCapacity_NSamples(totalPrisonerPopulation, totalAdmission,totalRelease, annualSentence, capacity, 20, 2050, 250)
futureOccupancies_40, futureOccupancyMean_40,futureOccupancySD_40 = modelAndProjectCapacity_NSamples(totalPrisonerPopulation, totalAdmission,totalRelease, annualSentence, capacity, 40, 2050, 250)
def orderCapacityByRegion(table, regions):
table = futureOccupancyMean_5
regions = regions.set_index("State")
table = pd.concat((regions, table), axis=1)
table.index.name = "State"
table.loc["Federal","Region"] = "Federal"
table.loc["Federal","Division"] = "Federal"
table = table.dropna(axis=0)
table = table.reset_index().set_index(["Region", "Division", "State"]).sort_index()
table.columns.name = "Year"
table.columns = table.columns.astype(int)
return table
def plotOccupancyProjection(occupancy, occupancy_error, initialMu):
occupancy = orderCapacityByRegion(occupancy, regions)
occupancy_error = orderCapacityByRegion(occupancy_error, regions)
for region in occupancy.index.get_level_values(0).unique():
for division in occupancy.loc[region].index.get_level_values(0).unique():
occupancy_1Division = occupancy.loc[region, division]
occupancy_error_1Division = occupancy_error.loc[region, division]
plt.figure()
occupancy_1Division.transpose().plot()
plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), fancybox=True, shadow=True, ncol=occupancy_1Division.shape[0])
# plt.gca().set_prop_cycle(None)
# for i, row in occupancy_1Division.iterrows():
# row_error = occupancy_error_1Division.loc[i]
# plt.fill_between(row.index, row - row_error, row + row_error, alpha=0.1)
if region != "Federal":
plt.title(region + " -- " + division + " Occupancy Projection, mu[1994]=" + str(initialMu))
else:
plt.title("Federal Occupancy Projection, mu[1994]=" + str(initialMu))
plt.ylabel("Occupancy")
def checkOccupancyThresholdCrossing(occupancies, upThreshold,downThreshold, initialMu):
crossingupYear = {}
crossingdownYear = {}
for area in occupancies:
occupancy = occupancies[area]
t, p_up = ttest_1samp(occupancy.values.transpose(), upThreshold)
p_up = p_up/2
logical_index = np.logical_and(p_up < 0.05, occupancy.mean(axis=1) > upThreshold)
if logical_index.sum() > 0:
crossingupYear[area] = occupancy.index[logical_index].min()
t, p_down = ttest_1samp(occupancy.values.transpose(), downThreshold)
p_up = p_up/2
logical_index = np.logical_and(p_up < 0.05, occupancy.mean(axis=1) < downThreshold)
if logical_index.sum() > 0:
crossingdownYear[area] = occupancy.index[logical_index].min()
crossingupYear = pd.Series(crossingupYear).sort_values()
crossingdownYear = pd.Series(crossingdownYear).sort_values()
print("mu[1994] = " + str(initialMu))
if not crossingupYear.empty:
print("Year when occupancy rises above " + str(upThreshold) + ":")
print(crossingupYear)
if not crossingupYear.empty:
print("Year when occupancy drops below " + str(downThreshold) + ":")
print(crossingdownYear)
return crossingupYear, crossingdownYear
plotOccupancyProjection(futureOccupancyMean_5,futureOccupancySD_5, 5)
crossingupYear, crossingdownYear = checkOccupancyThresholdCrossing(futureOccupancies_5, 1.5,1, 5)
plotOccupancyProjection(futureOccupancyMean_10,futureOccupancySD_10, 10)
crossingupYear, crossingdownYear = checkOccupancyThresholdCrossing(futureOccupancies_10, 1.5,1, 10)
plotOccupancyProjection(futureOccupancyMean_20,futureOccupancySD_20, 20)
crossingupYear, crossingdownYear = checkOccupancyThresholdCrossing(futureOccupancies_20, 1.5,1, 20)
plotOccupancyProjection(futureOccupancyMean_20,futureOccupancySD_40, 40)
crossingupYear, crossingdownYear = checkOccupancyThresholdCrossing(futureOccupancies_20, 1.5,1, 40)